This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
c <- 1+78
c
[1] 79
b <- 85+25
b
[1] 110
library(arules)
Loading required package: Matrix
Attaching package: ‘arules’
The following objects are masked from ‘package:base’:
abbreviate, write
library(arulesViz)
market_basket<- read.transactions(
file= "C:/PRASAD wORKS/R Studiio Works/data set in r-20231213T060837Z-001/data set in r/market_basket.csv",
sep=',',
quote="",
format = 'basket',
rm.duplicates =TRUE,
skip=1
)
distribution of transactions with duplicates:
items
1 2 3 4 5 6 7 8 9 10 11 12
1029 473 266 159 83 61 52 32 16 15 10 11
13 14 15 16 17 18 20 22 23 25 27 34
4 3 2 1 5 1 1 1 2 1 1 1
52
1
summary(market_basket)
transactions as itemMatrix in sparse format with
18440 rows (elements/itemsets/transactions) and
22346 columns (items) and a density of 0.0009915565
most frequent items:
WHITE HANGING HEART T-LIGHT HOLDER
1971
REGENCY CAKESTAND 3 TIER
1703
JUMBO BAG RED RETROSPOT
1598
PARTY BUNTING
1379
ASSORTED COLOUR BIRD ORNAMENT
1375
(Other)
400555
element (itemset/transaction) length distribution:
sizes
2 3 4 5 6 7 8 9 10 11 12 13
1359 715 602 616 660 588 568 584 608 508 542 499
14 15 16 17 18 19 20 21 22 23 24 25
475 509 543 544 466 433 477 420 409 341 327 311
26 27 28 29 30 31 32 33 34 35 36 37
239 266 250 216 264 232 196 170 165 170 144 124
38 39 40 41 42 43 44 45 46 47 48 49
128 110 123 121 111 106 94 92 83 89 82 79
50 51 52 53 54 55 56 57 58 59 60 61
73 79 62 55 64 67 62 48 53 46 47 37
62 63 64 65 66 67 68 69 70 71 72 73
45 35 31 33 32 39 38 34 23 30 30 14
74 75 76 77 78 79 80 81 82 83 84 85
25 32 24 20 18 16 9 16 18 19 16 18
86 87 88 89 90 91 92 93 94 95 96 97
15 11 14 13 9 8 11 15 12 9 5 8
98 99 100 101 102 103 104 105 106 107 108 109
10 10 3 7 11 3 9 7 2 3 3 6
110 111 112 113 114 115 116 117 118 119 120 121
4 3 3 2 5 4 4 8 5 5 5 5
122 123 124 125 126 127 128 129 131 133 134 135
4 8 5 1 4 5 3 4 2 1 4 1
136 137 138 139 140 141 142 143 144 145 147 149
1 3 2 2 1 2 2 2 2 1 5 1
150 151 152 153 156 158 159 160 165 166 167 168
1 1 2 1 1 1 1 1 1 1 1 1
170 171 172 178 179 181 182 186 188 194 195 197
1 1 1 2 2 1 2 1 1 1 1 1
205 206 209 212 221 231 251 260 264 274 284 334
1 1 1 1 1 1 1 1 1 1 1 1
340 352 357 367 380 423 441 443 530 534 548
1 1 1 1 1 1 1 1 1 1 1
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.00 8.00 16.00 22.16 28.00 548.00
includes extended item information - examples:
library(dplyr)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:arules’:
intersect, recode, setdiff, setequal, union
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
market_basket %>% head(n=5) %>% inspect()
items
[1] {1,
MEDIUM CERAMIC TOP STORAGE JAR}
[2] {2,
3D DOG PICTURE PLAYING CARDS,
AIRLINE BAG VINTAGE JET SET BROWN,
ALARM CLOCK BAKELIKE CHOCOLATE,
ALARM CLOCK BAKELIKE GREEN,
ALARM CLOCK BAKELIKE ORANGE,
ALARM CLOCK BAKELIKE PINK,
ALARM CLOCK BAKELIKE RED,
BATHROOM METAL SIGN,
BLACK CANDELABRA T-LIGHT HOLDER,
BLACK EAR MUFF HEADPHONES,
BLACK GRAND BAROQUE PHOTO FRAME,
BLUE 3 PIECE POLKADOT CUTLERY SET,
BLUE DRAWER KNOB ACRYLIC EDWARDIAN,
BOOM BOX SPEAKER BOYS,
BOX OF 6 ASSORTED COLOUR TEASPOONS,
CAMOUFLAGE EAR MUFF HEADPHONES,
CLEAR DRAWER KNOB ACRYLIC EDWARDIAN,
COLOUR GLASS. STAR T-LIGHT HOLDER,
EMERGENCY FIRST AID TIN,
FOUR HOOK WHITE LOVEBIRDS,
GREEN DRAWER KNOB ACRYLIC EDWARDIAN,
LARGE HEART MEASURING SPOONS,
MINI PAINT SET VINTAGE,
PINK 3 PIECE POLKADOT CUTLERY SET,
PINK DRAWER KNOB ACRYLIC EDWARDIAN,
PURPLE DRAWERKNOB ACRYLIC EDWARDIAN,
RED 3 PIECE RETROSPOT CUTLERY SET,
RED DRAWER KNOB ACRYLIC EDWARDIAN,
RED TOADSTOOL LED NIGHT LIGHT,
SET OF 2 TINS VINTAGE BATHROOM,
SET/3 DECOUPAGE STACKING TINS}
[3] {3,
3D DOG PICTURE PLAYING CARDS,
60 TEATIME FAIRY CAKE CASES,
72 SWEETHEART FAIRY CAKE CASES,
AIRLINE BAG VINTAGE JET SET BROWN,
AIRLINE BAG VINTAGE JET SET WHITE,
ALARM CLOCK BAKELIKE CHOCOLATE,
ALARM CLOCK BAKELIKE GREEN,
ALARM CLOCK BAKELIKE ORANGE,
ALARM CLOCK BAKELIKE PINK,
ALARM CLOCK BAKELIKE RED,
BLACK CANDELABRA T-LIGHT HOLDER,
BLUE NEW BAROQUE CANDLESTICK CANDLE,
BOX OF 6 ASSORTED COLOUR TEASPOONS,
CHOCOLATE CALCULATOR,
MINI LADLE LOVE HEART RED,
PACK OF 60 MUSHROOM CAKE CASES,
PACK OF 60 SPACEBOY CAKE CASES,
PINK NEW BAROQUECANDLESTICK CANDLE,
RED RETROSPOT OVEN GLOVE,
RED RETROSPOT OVEN GLOVE DOUBLE,
RED TOADSTOOL LED NIGHT LIGHT,
REGENCY CAKESTAND 3 TIER,
SANDWICH BATH SPONGE,
SET OF 2 TINS VINTAGE BATHROOM,
SET/2 RED RETROSPOT TEA TOWELS,
SMALL HEART MEASURING SPOONS,
TEA TIME OVEN GLOVE,
TOOTHPASTE TUBE PEN,
WOODLAND CHARLOTTE BAG}
[4] {3D SHEET OF CAT STICKERS,
3D SHEET OF DOG STICKERS,
4,
AIRLINE BAG VINTAGE JET SET BROWN,
AIRLINE BAG VINTAGE JET SET RED,
AIRLINE BAG VINTAGE JET SET WHITE,
AIRLINE BAG VINTAGE TOKYO 78,
GIFT BAG PSYCHEDELIC APPLES,
HOLIDAY FUN LUDO,
ICE CREAM SUNDAE LIP GLOSS,
LARGE HEART MEASURING SPOONS,
MINI PAINT SET VINTAGE,
PACK OF 60 DINOSAUR CAKE CASES,
RED DRAWER KNOB ACRYLIC EDWARDIAN,
RED RETROSPOT OVEN GLOVE DOUBLE,
RED RETROSPOT PURSE,
RED TOADSTOOL LED NIGHT LIGHT,
REGENCY CAKESTAND 3 TIER,
ROSES REGENCY TEACUP AND SAUCER,
SET OF 2 TINS VINTAGE BATHROOM,
SMALL FOLDING SCISSOR(POINTED EDGE),
SMALL HEART MEASURING SPOONS,
TREASURE ISLAND BOOK BOX,
VINTAGE HEADS AND TAILS CARD GAME,
WATERING CAN PINK BUNNY}
[5] {3D DOG PICTURE PLAYING CARDS,
5,
AIRLINE BAG VINTAGE JET SET BROWN,
AIRLINE BAG VINTAGE TOKYO 78,
ALARM CLOCK BAKELIKE CHOCOLATE,
ALARM CLOCK BAKELIKE RED,
COAL BLACK,
FEATHER PEN,
NAMASTE SWAGAT INCENSE,
RABBIT NIGHT LIGHT,
REGENCY MILK JUG PINK,
REGENCY SUGAR BOWL GREEN,
REGENCY TEA PLATE GREEN,
REGENCY TEA PLATE PINK,
REGENCY TEA PLATE ROSES,
REGENCY TEA STRAINER,
REGENCY TEAPOT ROSES,
SMALL HEART MEASURING SPOONS,
TRIPLE HOOK ANTIQUE IVORY ROSE,
VICTORIAN SEWING KIT}
library(RColorBrewer)
library(arules)
itemFrequencyPlot(x = market_basket,
topN =10,
type = 'absolute',
horiz = TRUE,
col = brewer.pal(10,'Spectral')
)
library(RColorBrewer)
library(arules)
# Assuming market_basket is your transaction data
# Calculate item frequencies
item_freq <- itemFrequency(market_basket)
# Get top N items
top_items <- head(sort(item_freq, decreasing = TRUE), 10)
# Create a bar plot with colors
barplot(top_items, col = brewer.pal(10, 'Spectral'), main = 'Top 10 Items')
rule1 <- market_basket %>%
apriori(parameter = list(supp=0.005, conf =0.8)) %>%
sort(by = "confidence")
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 92
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[22346 item(s), 18440 transaction(s)] done [0.23s].
sorting and recoding items ... [1257 item(s)] done [0.02s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 5 6 done [0.07s].
writing ... [561 rule(s)] done [0.01s].
creating S4 object ... done [0.00s].
summary(rule1)
set of 561 rules
rule length distribution (lhs + rhs):sizes
2 3 4 5 6
64 201 211 79 6
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 3.000 4.000 3.576 4.000 6.000
summary of quality measures:
support confidence coverage
Min. :0.005043 Min. :0.8000 Min. :0.005152
1st Qu.:0.005477 1st Qu.:0.8362 1st Qu.:0.006345
Median :0.006562 Median :0.8962 Median :0.007538
Mean :0.007055 Mean :0.8936 Mean :0.007899
3rd Qu.:0.007972 3rd Qu.:0.9412 3rd Qu.:0.008677
Max. :0.024946 Max. :1.0000 Max. :0.030152
lift count
Min. : 8.036 Min. : 93.0
1st Qu.: 25.003 1st Qu.:101.0
Median : 57.604 Median :121.0
Mean : 58.506 Mean :130.1
3rd Qu.: 91.274 3rd Qu.:147.0
Max. :126.817 Max. :460.0
mining info:
rule1 %>% head(n=5) %>% inspect
rule1 %>% tail(n=5) %>% inspect()
lhs rhs support confidence coverage lift count
[1] {SET OF 3 WOODEN HEART DECORATIONS,
SET OF 3 WOODEN SLEIGH DECORATIONS} => {SET OF 3 WOODEN STOCKING DECORATION} 0.006561822 0.8013245 0.008188720 53.92855 121
[2] {REGENCY MILK JUG PINK,
REGENCY SUGAR BOWL GREEN} => {REGENCY TEAPOT ROSES} 0.008947939 0.8009709 0.011171367 41.84108 165
[3] {PINK POLKADOT BOWL,
RED RETROSPOT BOWL} => {BLUE POLKADOT BOWL} 0.005422993 0.8000000 0.006778742 66.15247 100
[4] {SET OF 12 FAIRY CAKE BAKING CASES,
SET OF 6 SNACK LOAF BAKING CASES,
SET OF 6 TEA TIME BAKING CASES} => {SET OF 12 MINI LOAF BAKING CASES} 0.005856833 0.8000000 0.007321041 41.09192 108
[5] {LUNCH BAG APPLE DESIGN,
LUNCH BAG PINK POLKADOT,
LUNCH BAG WOODLAND} => {LUNCH BAG RED RETROSPOT} 0.006290672 0.8000000 0.007863341 11.45342 116
rule1 <- rule1 %>% sort(by ="lift")
rule1 %>% head(n=5) %>% inspect()
lhs rhs support confidence coverage lift count
[1] {DOLLY GIRL CHILDRENS CUP,
SPACEBOY CHILDRENS BOWL} => {DOLLY GIRL CHILDRENS BOWL} 0.005206074 0.9696970 0.005368764 126.8171 96
[2] {DOLLY GIRL CHILDRENS BOWL} => {DOLLY GIRL CHILDRENS CUP} 0.006344902 0.8297872 0.007646421 106.2589 117
[3] {DOLLY GIRL CHILDRENS CUP} => {DOLLY GIRL CHILDRENS BOWL} 0.006344902 0.8125000 0.007809111 106.2589 117
[4] {DOLLY GIRL CHILDRENS BOWL,
SPACEBOY CHILDRENS BOWL} => {DOLLY GIRL CHILDRENS CUP} 0.005206074 0.8135593 0.006399132 104.1808 96
[5] {HERB MARKER BASIL,
HERB MARKER MINT,
HERB MARKER PARSLEY,
HERB MARKER ROSEMARY,
HERB MARKER THYME} => {HERB MARKER CHIVES} 0.007158351 0.9166667 0.007809111 101.8273 132
#Plotting
plot(rule1,engine= "htmlwidget")
To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
plot(rule1, method = "two-key", engine="htmlwidget")
To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rule1, method="graph",engine="htmlwidget")
Warning: Too many rules supplied. Only plotting the best 100 using ‘lift’ (change control parameter max if needed).
rule2 %>% tail(n=5) %>% inspect
lhs rhs support confidence coverage lift count
[1] {HOME BUILDING BLOCK WORD} => {WHITE HANGING HEART T-LIGHT HOLDER} 0.011334056 0.3024602 0.03747289 2.829714 209
[2] {LUNCH BAG RED RETROSPOT} => {LUNCH BAG APPLE DESIGN} 0.021095445 0.3020186 0.06984816 6.430974 389
[3] {CHARLOTTE BAG SUKI DESIGN} => {CHARLOTTE BAG APPLES DESIGN} 0.009544469 0.3008547 0.03172451 13.053555 176
[4] {IVORY KITCHEN SCALES} => {MINT KITCHEN SCALES} 0.010086768 0.3000000 0.03362256 18.197368 186
[5] {JUMBO SHOPPER VINTAGE RED PAISLEY} => {JUMBO BAG BAROQUE BLACK WHITE} 0.012852495 0.3000000 0.04284165 8.523883 237
plot(rule2,engine= "htmlwidget")
To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rule2,method = "two-key",engine= "htmlwidget")
To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(rule2, method="graph",engine= "htmlwidget")
Warning: Too many rules supplied. Only plotting the best 100 using ‘lift’ (change control parameter max if needed).
rule3 <- market_basket %>%
apriori(parameter = list(supp = 0.02, conf = 0.5)) %>%
sort(by = 'support')
Apriori
Parameter specification:
Algorithmic control:
Absolute minimum support count: 368
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[22346 item(s), 18440 transaction(s)] done [0.19s].
sorting and recoding items ... [208 item(s)] done [0.01s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 done [0.01s].
writing ... [29 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
summary(rule3)
set of 29 rules
rule length distribution (lhs + rhs):sizes
2 3
26 3
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 2.000 2.000 2.103 2.000 3.000
summary of quality measures:
support confidence coverage
Min. :0.02028 Min. :0.5025 Min. :0.02364
1st Qu.:0.02251 1st Qu.:0.5594 1st Qu.:0.03444
Median :0.02364 Median :0.6355 Median :0.03812
Mean :0.02442 Mean :0.6467 Mean :0.03863
3rd Qu.:0.02538 3rd Qu.:0.6909 3rd Qu.:0.04284
Max. :0.02956 Max. :0.8945 Max. :0.05049
lift count
Min. : 5.799 Min. :374.0
1st Qu.: 7.552 1st Qu.:415.0
Median :14.151 Median :436.0
Mean :13.995 Mean :450.3
3rd Qu.:18.438 3rd Qu.:468.0
Max. :23.909 Max. :545.0
mining info:
rule3 %>% head(n=5) %>% inspect
lhs rhs support confidence coverage lift count
[1] {JUMBO BAG PINK POLKADOT} => {JUMBO BAG RED RETROSPOT} 0.02955531 0.6264368 0.04718004 7.22872 545
[2] {GREEN REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AND SAUCER} 0.02933839 0.7829233 0.03747289 18.43819 541
[3] {ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND SAUCER} 0.02933839 0.6909323 0.04246204 18.43819 541
[4] {ALARM CLOCK BAKELIKE GREEN} => {ALARM CLOCK BAKELIKE RED} 0.02879610 0.6730038 0.04278742 14.15073 531
[5] {ALARM CLOCK BAKELIKE RED} => {ALARM CLOCK BAKELIKE GREEN} 0.02879610 0.6054732 0.04755965 14.15073 531
rule3 %>% head(n=5) %>% inspect
lhs rhs support confidence coverage lift count
[1] {JUMBO BAG PINK POLKADOT} => {JUMBO BAG RED RETROSPOT} 0.02955531 0.6264368 0.04718004 7.22872 545
[2] {GREEN REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AND SAUCER} 0.02933839 0.7829233 0.03747289 18.43819 541
[3] {ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND SAUCER} 0.02933839 0.6909323 0.04246204 18.43819 541
[4] {ALARM CLOCK BAKELIKE GREEN} => {ALARM CLOCK BAKELIKE RED} 0.02879610 0.6730038 0.04278742 14.15073 531
[5] {ALARM CLOCK BAKELIKE RED} => {ALARM CLOCK BAKELIKE GREEN} 0.02879610 0.6054732 0.04755965 14.15073 531
rule3 %>% tail(n=5) %>% inspect
lhs rhs support confidence coverage lift count
[1] {ALARM CLOCK BAKELIKE PINK} => {ALARM CLOCK BAKELIKE RED} 0.02147505 0.6460033 0.03324295 13.58301 396
[2] {GREEN REGENCY TEACUP AND SAUCER,
PINK REGENCY TEACUP AND SAUCER} => {ROSES REGENCY TEACUP AND SAUCER} 0.02114967 0.8478261 0.02494577 19.96668 390
[3] {PINK REGENCY TEACUP AND SAUCER,
ROSES REGENCY TEACUP AND SAUCER} => {GREEN REGENCY TEACUP AND SAUCER} 0.02114967 0.8944954 0.02364425 23.87047 390
[4] {GREEN REGENCY TEACUP AND SAUCER,
ROSES REGENCY TEACUP AND SAUCER} => {PINK REGENCY TEACUP AND SAUCER} 0.02114967 0.7208872 0.02933839 23.90856 390
[5] {GREEN REGENCY TEACUP AND SAUCER} => {REGENCY CAKESTAND 3 TIER} 0.02028200 0.5412446 0.03747289 5.86057 374
plot(rule3,engine= "htmlwidget")
plot(rule3,method="two-key",engine= "htmlwidget")
plot(rule3,method="graph",engine= "htmlwidget")